install.packages('corpcor')
install.packages('mctest')
require(mgcv)
require(corpcor)
require(mctest)


#Importing datasets
all_sellerdata  = read.csv(file="C:\\Users\\f00456n\\Documents\\Amazon Price Dynamics\\Data Scraped\\New Data\\Modeling data sets\\Deep Fryer\\finalnumsellers_modeldata_deepfryer.csv", header=TRUE, sep=",")

modeldata = all_sellerdata[4:138,]

modeldata[is.na(modeldata)] = 0
attach(modeldata)


#Tfal: number of sellers

data_numsellers_tfal = data.frame(cbind(
                                        Y_amzn_tfal_lag1,
                                        Y_clust2_tfal_lag1,
                                        incr10per_amzn_tfal,
                                        incr10per_3p_tfal,
                                        incr20per_amzn_tfal,
                                        incr20per_3p_tfal,
                                        incr5per_amzn_tfal,
                                        incr5per_3p_tfal,
                                        decr10per_amzn_tfal,
                                        decr10per_3p_tfal,
                                        decr20per_amzn_tfal,
                                        decr20per_3p_tfal,
                                        decr5per_amzn_tfal,
                                        decr5per_3p_tfal,
                                        buyboxprice_tfal_lag1,
                                        nonbbox3pprice_tfal_lag,
                                        priceAmzn_tfal_lag,
                                        tfal_bottombrandlag1,
                                        tfal_topbrandlag1,
                                        csnart_bottombrandlag1,
                                        csnart_topbrandlag1,
                                        hbeach_bottombrandlag1,
                                        hbeach_topbrandlag1,
                                        ep_bottombrandlag1,
                                        ep_topbrandlag1,
                                        presto_bottombrandlag1,
                                        presto_topbrandlag1,
                                        secura_bottombrandlag1,
                                        secura_topbrandlag1,
                                        weekend,
                                        #more additional variables - in absence of review text
                                        Answered_Questionstfal_lag1,
                                        Product_reviews_tfal_lag1,
                                        productstar_tfal_lag1,
                                        salerank_tfal_lag1,
                                        saleranksubcat_tfal_lag1,
                                        num_tfalused3p_lag1,
                                        seasonal_sale))



#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_tfal))

for (m in (1:ncol(data_numsellers_tfal)))
{ variance_X[m] = var(data_numsellers_tfal[,m]) }

nonzero_var = ncol(data_numsellers_tfal)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_tfal),nonzero_var)

trainX_retained = data_numsellers_tfal[,(variance_X != 0)]


data_numsellers_tfal = trainX_retained

gam_numsellers_tfal = gam(NUM_SELLERS_Tfal ~  
      ##Y_amzn_tfal_lag1	+ 
     # Y_clust2_tfal_lag1 +
     # hbeach_topbrandlag1	+
      #secura_topbrandlag1	+
       tfal_topbrandlag1	+
     # ep_bottombrandlag1	+
     # presto_bottombrandlag1	+
      #decr10per_3p_tfal	+
     # decr10per_amzn_tfal	+
     ## decr5per_3p_tfal	+
     # decr5per_amzn_tfal	+
     # incr20per_3p_tfal	+
    incr20per_amzn_tfal	+
     decr20per_amzn_tfal	+
      incr5per_amzn_tfal +
     # incr10per_3p_tfal	+
      incr10per_amzn_tfal	+
     ## incr5per_3p_tfal	+
     # incr5per_amzn_tfal	+
    ##  weekend	+
      #additional variables 
      nonbbox3pprice_tfal_lag+
      priceAmzn_tfal_lag +
      Answered_Questionstfal_lag1 +
      #buyboxprice_tfal_lag1 +
      Product_reviews_tfal_lag1+
      productstar_tfal_lag1 +
      salerank_tfal_lag1 +
     ## saleranksubcat_tfal_lag1 +
      seasonal_sale,family= gaussian(link ='identity') ,data=data_numsellers_tfal,method="REML",optimizer=c("outer","newton"), fit = TRUE)


summary(gam_numsellers_tfal)


#Cuisinart: number of sellers

data_numsellers_csnart = data.frame(cbind(incr10per_amzn_csnart,
                                       incr10per_clust1_csnart,
                                       incr10per_clust3_csnart,
                                       incr10per_clust5_tfal,
                                       incr20per_amzn_csnart,
                                       incr20per_clust1_csnart,
                                       incr20per_clust3_csnart,
                                       incr20per_clust5_tfal,
                                       incr5per_amzn_csnart,
                                       incr5per_clust1_csnart,
                                       incr5per_clust3_csnart,
                                       incr5per_clust5_tfal,
                                       decr10per_amzn_csnart,
                                       decr10per_clust1_csnart,
                                       decr10per_clust3_csnart,
                                       decr10per_clust5_tfal,
                                       decr20per_amzn_csnart,
                                       decr20per_clust1_csnart,
                                       decr20per_clust3_csnart,
                                       decr20per_clust5_tfal,
                                       decr5per_amzn_csnart,
                                       decr5per_clust1_csnart,
                                       decr5per_clust3_csnart,
                                       decr5per_clust5_tfal,
                                       buyboxprice_csnart_lag1,
                                        tfal_bottombrandlag1,
                                        tfal_topbrandlag1,
                                        csnart_bottombrandlag1,
                                        csnart_topbrandlag1,
                                        hbeach_bottombrandlag1,
                                        hbeach_topbrandlag1,
                                        ep_bottombrandlag1,
                                        ep_topbrandlag1,
                                        presto_bottombrandlag1,
                                        presto_topbrandlag1,
                                        secura_bottombrandlag1,
                                        secura_topbrandlag1,
                                        weekend,
                                        #more additional variables - in absence of review text
                                        nonbbox3pprice_csnart_lag,
                                        priceAmzn_csnart_lag,
                                        Answered_Questionscsnart_lag1,
                                        Product_reviews_csnart_lag1,
                                        productstar_csnart_lag1,
                                        salerank_csnart_lag1,
                                        saleranksubcat_csnart_lag1,
                                        num_csnartused3p_lag1,
                                        num_csnartusedamzn_lag1,
                                        seasonal_sale))


#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_csnart))

for (m in (1:ncol(data_numsellers_csnart)))
{ variance_X[m] = var(data_numsellers_csnart[,m]) }

nonzero_var = ncol(data_numsellers_csnart)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_csnart),nonzero_var)

trainX_retained = data_numsellers_csnart[,(variance_X != 0)]


data_numsellers_csnart = trainX_retained

gam_numsellers_csnart = gam(NUM_SELLERS_CSN ~  Answered_Questionscsnart_lag1 + 
                                                       buyboxprice_csnart_lag1 + 
                                                      ##decr10per_amzn_csnart +       
                                                      ## decr10per_clust1_csnart + 
                                                      ## decr10per_clust3_csnart + 
                                                       decr20per_amzn_csnart +         
                                                      ## decr20per_clust1_csnart + 
                                                      ## decr5per_amzn_csnart + 
                                                       ##decr5per_clust1_csnart +     
                                                       ##decr5per_clust3_csnart + 
                                                       #ep_bottombrandlag1 + 
                                                       #hbeach_topbrandlag1 + 
                                                      incr10per_amzn_csnart + 
                                                     #  incr10per_clust1_csnart + 
                                                       #incr10per_clust3_csnart +     
                                                       incr20per_amzn_csnart + 
                                                       #incr20per_clust1_csnart + 
                                                       incr5per_amzn_csnart + 
                                                       #incr5per_clust1_csnart + 
                                                       #incr5per_clust3_csnart + 
                                                       #presto_bottombrandlag1 + 
                                                       Product_reviews_csnart_lag1 + 
                                                       nonbbox3pprice_csnart_lag + priceAmzn_csnart_lag +
                                                       ##productstar_csnart_lag1 + 
                                                       salerank_csnart_lag1 + 
                                                       #saleranksubcat_csnart_lag1 + 
                                                       seasonal_sale + 
                                                       #secura_topbrandlag1 + 
                                                       ##tfal_topbrandlag1 + 
                                                       weekend ,family= gaussian(link ='identity') ,data=data_numsellers_csnart,method="REML",optimizer=c("outer","newton"), fit = TRUE)


summary(gam_numsellers_csnart)    


#Hamilton Beach: number of sellers

data_numsellers_hbeach = data.frame(cbind(incr10per_amzn_hbeach,
                                          incr20per_amzn_hbeach,
                                          incr5per_amzn_hbeach,
                                          decr10per_amzn_hbeach,
                                          decr20per_amzn_hbeach,
                                          decr5per_amzn_hbeach,
                                          buyboxprice_Hbeach_lag1,
                                          tfal_bottombrandlag1,
                                          tfal_topbrandlag1,
                                          csnart_bottombrandlag1,
                                          csnart_topbrandlag1,
                                          hbeach_bottombrandlag1,
                                          hbeach_topbrandlag1,
                                          ep_bottombrandlag1,
                                          ep_topbrandlag1,
                                          presto_bottombrandlag1,
                                          presto_topbrandlag1,
                                          secura_bottombrandlag1,
                                          secura_topbrandlag1,
                                          weekend,
                                          #more additional variables - in absence of review text
                                          nonbbox3pprice_Hbeach_lag,
                                          priceAmzn_Hbeach_lag,
                                          Answered_QuestionsHbeach_lag1,
                                          Product_reviews_Hbeach_lag1,
                                          productstar_Hbeach_lag1,
                                          salerank_Hbeach_lag1,
                                          saleranksubcat_Hbeach_lag1,
                                          num_HBeachused3p_lag1,
                                          num_HBeachusedamzn_lag1,
                                          seasonal_sale))


#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_hbeach))

for (m in (1:ncol(data_numsellers_hbeach)))
{ variance_X[m] = var(data_numsellers_hbeach[,m]) }

nonzero_var = ncol(data_numsellers_hbeach)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_hbeach),nonzero_var)

trainX_retained = data_numsellers_hbeach[,(variance_X != 0)]


data_numsellers_hbeach = trainX_retained

gam_numsellers_hbeach = gam(NUM_SELLERS_Hbeach ~ 
                              Answered_QuestionsHbeach_lag1	+
                              buyboxprice_Hbeach_lag1	+
                              decr10per_amzn_hbeach	+
                             # decr20per_amzn_hbeach	+
                              decr5per_amzn_hbeach	+
                             incr5per_amzn_hbeach	+
                             ## incr10per_amzn_hbeach	+
                            ##  incr20per_amzn_hbeach	+
                              #ep_bottombrandlag1	+
                              #hbeach_topbrandlag1	+
                              #num_HBeachusedamzn_lag1	+
                             ## presto_bottombrandlag1	+
                              Product_reviews_Hbeach_lag1	+
                              nonbbox3pprice_Hbeach_lag+
                              priceAmzn_Hbeach_lag +
                              productstar_Hbeach_lag1	+
                              salerank_Hbeach_lag1	+
                             # saleranksubcat_Hbeach_lag1	+
                              seasonal_sale	+
                              #secura_topbrandlag1	+
                              #Y_amzn_hbeach_lag1	+
                              ##tfal_topbrandlag1	+
                              weekend	,family= gaussian(link ='identity') ,data=data_numsellers_hbeach,method="REML",optimizer=c("outer","newton"), fit = TRUE)


summary(gam_numsellers_hbeach)    

                            
#Presto - Number of sellers

data_numsellers_presto = data.frame(cbind(incr10per_amzn_presto,
                                          incr20per_amzn_presto,
                                          incr5per_amzn_presto,
                                          decr10per_amzn_presto,
                                          decr20per_amzn_presto,
                                          decr5per_amzn_presto,
                                          mean_buyboxprice_presto_lag1,
                                          mean_nonbbox3p_presto_lag,
                                          weekend,
                                          presto_bottombrandlag1,
                                          #more additional variables - in absence of review text
                                          mean_ans_questions_presto_lag1,
                                          mean_Product_reviews_presto_lag1,
                                          meanprodstar_presto_lag1,
                                          meansalerank_presto_lag1,
                                          meansaleranksubcat_presto_lag1,
                                          NUM_prestoused3p_lag1,
                                          NUM_prestousedamzn_lag1,
                                          seasonal_sale))


#VARIANCE CHECK
#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_presto))

for (m in (1:ncol(data_numsellers_presto)))
{ variance_X[m] = var(data_numsellers_presto[,m]) }

nonzero_var = ncol(data_numsellers_presto)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_presto),nonzero_var)

trainX_retained = data_numsellers_presto[,(variance_X != 0)]


data_numsellers_presto = trainX_retained

gam_numsellers_presto = gam(NUM_SELLERS_Presto ~ 
                              decr10per_amzn_presto	+
                              decr5per_amzn_presto	+
                             # incr10per_amzn_presto	+
                             #  incr5per_amzn_presto	+
                              mean_Product_reviews_presto_lag1	+
                              meanprodstar_presto_lag1	+
                               mean_ans_questions_presto_lag1	+
                             priceAmzn_presto_lag+
                              #meansalerank_presto_lag1	+
                              seasonal_sale	+
                              weekend	+
                             mean_buyboxprice_presto_lag1	+
                               mean_nonbbox3p_presto_lag
                             # meansaleranksubcat_presto_lag1	+
                             # presto_bottombrandlag1
                           , family= gaussian(link ='identity') ,data=data_numsellers_presto,method="REML",optimizer=c("outer","newton"), fit = TRUE)


summary(gam_numsellers_presto)    	
                            

#SECURA: Number of sellers

data_numsellers_secura = data.frame(cbind(incr10per_3p_sec,
                                          incr20per_3p_sec,
                                          incr5per_3p_sec,
                                          decr10per_3p_sec,
                                          decr20per_3p_sec,
                                          decr5per_3p_sec,
                                          buyboxprice_secura_lag1,
                                          nonbbox3pprice_secura_lag,
                                          weekend,
                                          #more additional variables - in absence of review text
                                          priceclust4_secura_lag,
                                          Answered_Questionssecura_lag1,
                                          Product_reviews_secura_lag1,
                                          productstar_secura_lag1,
                                          salerank_secura_lag1,
                                          saleranksubcat_secura_lag1,
                                          num_securaused3p_lag1,
                                          num_securausedamzn_lag1,
                                          seasonal_sale))


#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_secura))

for (m in (1:ncol(data_numsellers_secura)))
{ variance_X[m] = var(data_numsellers_secura[,m]) }

nonzero_var = ncol(data_numsellers_secura)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_secura),nonzero_var)

trainX_retained = data_numsellers_secura[,(variance_X != 0)]


data_numsellers_secura = trainX_retained

gam_numsellers_secura = gam(NUM_SELLERS_Sec ~ 
                              Answered_Questionssecura_lag1	+
                              buyboxprice_secura_lag1	+
                              nonbbox3pprice_secura_lag +
                              Product_reviews_secura_lag1	+
                              saleranksubcat_secura_lag1	+
                              productstar_secura_lag1	+
                              seasonal_sale	+
                              weekend	+
                              salerank_secura_lag1, family= gaussian(link ='identity') ,data=data_numsellers_secura,method="REML",optimizer=c("outer","newton"), fit = TRUE)


summary(gam_numsellers_secura)    	

                            


